
* Get total mortgage debt outstanding, including 2nd liens
* Note balances on 2nd liens are only available quarterly, not monthly
forvalues qq = 61(1)92 { //2nd-lien balances only available starting in q61
gen total_mortgage_debt`qq' = balq`qq'
replace total_mortgage_debt`qq' = total_mortgage_debt`qq' + close2ndq`qq' if !mi(close2ndq`qq') 
replace total_mortgage_debt`qq' = total_mortgage_debt`qq' + open2ndq`qq' if !mi(open2ndq`qq') 
}


* Get month and year for matching to BlackKnight HPI
gen bk_month     = mod(open_month,12)
replace bk_month = 12 if bk_month==0

* open_year is sometimes missing, and even when it is not, it sometimes is not
* consistent with the year in open_month.  So use open_month to figure out the
* year a mortgage was originated.  
gen bk_year = floor((open_month-bk_month)/12) + 1998

* For use getting HPIs, get the month the sample was drawn
* According to the technical documentation for the ASMB, this was
* "at the end of the preceding year"
gen      BK_survey_date = 201501 if survey_year==2016 
replace  BK_survey_date = 201601 if survey_year==2017 
replace  BK_survey_date = 201701 if survey_year==2018 


gen BK_orig_date = 100*bk_year + bk_month //date in the format used by BlackKnight


* Get quarter and year for matching to FHFA HPI
gen fhfa_quarter = ceil(bk_month/3)
gen fhfa_year = bk_year

*******************************************************************************
** HPI from FHFA by state
*******************************************************************************

* get hpi at origination
gen state   = geo_st
gen year    = fhfa_year
gen quarter = fhfa_quarter
merge m:1 state year quarter using "$FHFA/HPI_AT_state.dta", keepusing(hpi_state) gen(merge_fhfa_st_orig) keep(match master) // some failed matches, virtually all in Puerto Rico
rename hpi_state fhfa_st_hpi_orig

* get hpi at the time of the survey
replace quarter = 4
replace year = survey_year-2 // For e.g. 2016 ASMB, we want house prices on Dec. 31 2014

merge m:1 state year quarter using "$FHFA/HPI_AT_state.dta", keepusing(hpi_state) gen(merge_fhfa_st_survey) keep(match master) // some failed matches, virtually all in Puerto Rico
rename hpi_state fhfa_st_hpi_survey

* get MtM value at state level
gen MtM_value_FHFA_state = value * (fhfa_st_hpi_survey / fhfa_st_hpi_orig)

*******************************************************************************
** HPI from FHFA by county, which comes with the NMDB as hpiq
*******************************************************************************


gen orig_quarter   = ((bk_year - 1998) * 4) + fhfa_quarter
gen survey_quarter = ((survey_year - 2  - 1998)*4) + 4 // For e.g. 2016 ASMB, we want house prices on Dec. 31 2014


gen fhfa_county_hpi_orig = .
gen fhfa_county_hpi_survey = .

forvalues qq=1(1)92 {
	
	replace fhfa_county_hpi_orig   = hpiq`qq' if orig_quarter==`qq'
	replace fhfa_county_hpi_survey = hpiq`qq' if survey_quarter==`qq'
}

gen MtM_value_FHFA_county = value * (fhfa_county_hpi_survey / fhfa_county_hpi_orig)

*******************************************************************************
** HPI from FHFA by tract
*******************************************************************************

****************************************
****************************************
** Tract-level HPI data is only available on an annual basis.  So turn it into monthly
* by assuming constant within-year appreciation


preserve
use "$FHFA/HPI_AT_BDL_tract", clear
keep tract year hpi
bys tract: ipolate hpi year, gen(hpi_interpolated) // interpolate (but do not extrapolate!) when tracts are missing data for some years
replace hpi = hpi_interpolated
drop hpi_interpolated
drop if mi(hpi) // now that we've interpolated, drop years outside the range in which we have data for some tracts.  

gen month = ((year - 1998) * 12) + 6 // in the NMDB, month 1 is January 1998.  Tract-level HPIs are annual, so set them for June
drop year

tsset tract month
tsfill

bys tract: ipolate hpi month, gen(hpi_tract_m)
keep tract month hpi_tract_m

tostring tract, generate(geo2019_stcnty_tract_fips) format(%11.0f)
replace geo2019_stcnty_tract_fips = "0" + geo2019_stcnty_tract_fips if tract<10000000000

save "$FHFA/HPI_tract_processed", replace
restore



****************************************
****************************************
** Now work on the actual house price idices

gen month = open_month
gen tract = geo2019_stcnty_tract

tostring geo2019_stcnty_tract, generate(geo2019_stcnty_tract_fips) format(%11.0f)
replace geo2019_stcnty_tract_fips = "0" + geo2019_stcnty_tract_fips if tract<10000000000

merge m:1 geo2019_stcnty_tract_fips month using "$FHFA/HPI_tract_processed", keepusing(hpi_tract_m) keep(master match) gen(merge_fhfa_tract_orig) // about 3,800 successful matches, and 600 failures.  Most of the failures seem to be because of a genuine lack of tract-level HPI data

rename hpi_tract_m fhfa_tract_hpi_orig
drop month tract


* Now MtM value the month that delinquency was measured
gen month_sample_drawn      = 205 if survey_year==2016 //Month 205 is 1/1/2015
replace month_sample_drawn  = 217 if survey_year==2017 //Month 228 is 1/1/2016
replace  month_sample_drawn = 229 if survey_year==2018 //Month 240 is 1/1/2017

gen month = month_sample_drawn
merge m:1 geo2019_stcnty_tract_fips month using "$FHFA/HPI_tract_processed", keepusing(hpi_tract_m) keep(master match)  gen(merge_fhfa_tract_survey)
rename hpi_tract_m fhfa_tract_hpi_survey

gen MtM_value_FHFA_tract = value * (fhfa_tract_hpi_survey / fhfa_tract_hpi_orig)
drop month

* Finally, get MtM value the month the survey was sent out
gen month_survey_sent = 224 if survey_year==2016 //month 224 is August 2016
replace month_survey_sent = 235 if survey_year==2017 //month 235 is July 2017
replace month_survey_sent = 246 if survey_year==2018 //month 246 is June 2018

gen month = month_survey_sent
merge m:1 geo2019_stcnty_tract_fips month using "$FHFA/HPI_tract_processed", keepusing(hpi_tract_m) keep(master match)  gen(merge_fhfa_tract_timeofsurvey)
rename hpi_tract_m fhfa_tract_hpi_timeofsurvey

gen MtM_value_FHFA_timeofsurvey = value * (fhfa_tract_hpi_timeofsurvey / fhfa_tract_hpi_orig)
drop month


* Use MtM value at the county level, if tract is not available, at the time of the survey
gen fhfa_county_hpi_timeofsurvey    = hpiq74 if survey_year==2016 // q74 is 6/30/2016
replace fhfa_county_hpi_timeofsurvey = hpiq78 if survey_year==2017 // q78 is 6/30/2017
replace fhfa_county_hpi_timeofsurvey = hpiq82 if survey_year==2018 // q82 is 6/30/2018

replace MtM_value_FHFA_timeofsurvey = value * (fhfa_county_hpi_timeofsurvey / fhfa_county_hpi_orig) if mi(MtM_value_FHFA_timeofsurvey)

* 53 properties are still missing MtM values at the time of the survey from the FHFA
* Later when we compute BlackKnight MtM estimates, 
* we will want to use MtM at the BlackKnight coarse county level for these
* Get the dates for that here

gen      BK_timeofsurvey_date = 201608 if survey_year==2016 //2016 survey was sent August 2016
replace  BK_timeofsurvey_date = 201707 if survey_year==2017 //2017 survey was sent July 2017
replace  BK_timeofsurvey_date = 201806 if survey_year==2018 //2018 survey was sent June 2018

*******************************************************************************
** HPI from BlackKnight by state
*******************************************************************************

destring(geo_st_fips), gen(agg_value_code)
gen property_type_name = "All Property Types"

*** Get BK HPI at origination
* There are 118 unmatched observations.  These are all either in Puerto Rico (which is not in the BlackKnight data),
* or were originated in 1988 or earlier (before the BlackKnight HPIs start)
gen agg_date = BK_orig_date 
merge m:1 agg_value_code agg_date property_type_name using "$BK/state_only", keepusing(aggregated_hpi) keep(match master) gen(merge_bk_st_orig)
rename aggregated_hpi BK_st_hpi_orig

*** Get BK HPI at the time the survey is stratified
replace agg_date = BK_survey_date
merge m:1 agg_value_code agg_date property_type_name using "$BK/state_only", keepusing(aggregated_hpi) keep(match master) gen(merge_bk_st_survey) //some failed matches, virtually all in Puerto Rico (state_code==72).  A couple failed matches have state_code==0
rename aggregated_hpi BK_st_hpi_survey

*** Get BK HPI at the time the survey is taken
replace agg_date = BK_timeofsurvey_date
merge m:1 agg_value_code agg_date property_type_name using "$BK/state_only", keepusing(aggregated_hpi) keep(match master) gen(merge_bk_st_timeofsurvey)
rename aggregated_hpi BK_st_hpi_timeofsurvey

gen MtM_value_BK_st = value * (BK_st_hpi_survey / BK_st_hpi_orig) // the MtM value estimate using BlackKnight's state-level HPI
gen MtM_value_BK_st_timeofsurvey = value * (BK_st_hpi_timeofsurvey / BK_st_hpi_orig)

*******************************************************************************
** "Coarse" HPI from BlackKnight by county
*******************************************************************************
* Now, work on matching by county
drop agg_value_code
destring(geo_stcnty), gen(agg_value_code)

replace agg_date = BK_orig_date 
merge m:1 agg_value_code agg_date property_type_name using "$BK/county_only", keepusing(aggregated_hpi) keep(match master) gen(merge_bk_cnty_orig) //7 failed merges by county, that were successful by state.
rename aggregated_hpi BK_cnty_hpi_orig

replace agg_date = BK_survey_date 
merge m:1 agg_value_code agg_date property_type_name using "$BK/county_only", keepusing(aggregated_hpi) keep(match master) gen(merge_bk_cnty_survey) //7 failed merges by county, that were successful by state.
rename aggregated_hpi BK_cnty_hpi_survey

gen MtM_value_BK_cnty_coarse = value * (BK_cnty_hpi_survey / BK_cnty_hpi_orig) // the MtM value estimate using BlackKnight's county-level HPI



* Use BK coarse county data for MtM values at the time of the survey where necessary
replace agg_date = BK_timeofsurvey_date

merge m:1 agg_value_code agg_date property_type_name using "$BK/county_only", keepusing(aggregated_hpi) keep(match master) gen(merge_bk_cnty_timeofsurvey) 
rename aggregated_hpi BK_cnty_hpi_timeofsurvey

replace MtM_value_FHFA_timeofsurvey = value * (BK_cnty_hpi_timeofsurvey / BK_cnty_hpi_orig) if mi(MtM_value_FHFA_timeofsurvey) // still 19 misssings




*******************************************************************************
** "Fine" HPI from BlackKnight by county
* This was used in early versions of the paper but not later ones.
*******************************************************************************

if $BKfine_bool == 1 {

drop property_type_name
gen property_type_name = "Condo" if inlist(x18,4,5) //multifamily, apartment, or condo
replace property_type_name = "Single Family" if mi(property_type_name)


* First at origination
replace agg_date = BK_orig_date 
merge m:1 agg_value_code agg_date property_type_name using "$BK/county_only", keepusing(price_tier*) keep(match master) gen(merge_bk_cnty_orig2)
drop price_tier*sa

gen BK_price_tier = .
gen diff_pt = .
foreach pt in 1 2 3 4 5 {
	gen diff_pt_`pt' = abs(value - price_tier_`pt')
	
	replace BK_price_tier = `pt' if diff_pt_`pt' < diff_pt
	replace diff_pt = diff_pt_`pt' if BK_price_tier == `pt'
	
}

gen BK_cnty_hpi_fine_orig   = .
foreach pt in 1 2 3 4 5 {
	replace BK_cnty_hpi_fine_orig = price_tier_`pt' if BK_price_tier == `pt'
}

* Now at the time of the survey
replace agg_date = BK_survey_date 
drop price_tier*
merge m:1 agg_value_code agg_date property_type_name using "$BK/county_only", keepusing(price_tier*) keep(match master) gen(merge_bk_cnty_survey2)
drop price_tier*sa


gen BK_cnty_hpi_fine_survey = .
foreach pt in 1 2 3 4 5 {	
	replace BK_cnty_hpi_fine_survey  = price_tier_`pt' if BK_price_tier == `pt'
}

gen MtM_value_BK_cnty_fine = value * (BK_cnty_hpi_fine_survey / BK_cnty_hpi_fine_orig) // the MtM value estimate using BlackKnight's county-level HPI

}

else {
	
	gen MtM_value_BK_cnty_fine = .
}

*******************************************************************************
** Use the MtM values to get LTV estimates
* Also, get an indicator for second mortgages
*******************************************************************************

* Total mortgage debt is only available quarterly, but this is not an issue, since we 
* want debt outstanding at the end of the year anyway
gen total_mortgage_debt_by_survey = total_mortgage_debt68 if survey_year==2016 & total_mortgage_debt68!=0 // 12/31/2014
gen second_mortgage_by_survey     = (total_mortgage_debt68 > balq68) if survey_year==2016 & total_mortgage_debt68!=0  & !mi(total_mortgage_debt68)


* Sometimes servicers stop reporting balances before loans are actually closed/charged-off, so
* some loans in the ASMB have missing balance at the time they were included in the survey.
* For these, use the most recent nonzero balance, going back up to 6 quarters
forvalues qq = 67(-1)62 {
	replace total_mortgage_debt_by_survey = total_mortgage_debt`qq' if mi(total_mortgage_debt_by_survey) & !mi(total_mortgage_debt`qq') & total_mortgage_debt`qq'!=0 & survey_year==2016 
	replace second_mortgage_by_survey     = 1 if (total_mortgage_debt`qq' > balq`qq') & survey_year==2016 & !mi(total_mortgage_debt`qq')
}


replace total_mortgage_debt_by_survey = total_mortgage_debt72 if survey_year==2017 & total_mortgage_debt72!=0 // 12/31/2015
replace second_mortgage_by_survey     = (total_mortgage_debt72 > balq72) if survey_year==2017 & total_mortgage_debt72!=0  & !mi(total_mortgage_debt72)
forvalues qq = 71(-1)66 {
	replace total_mortgage_debt_by_survey = total_mortgage_debt`qq' if mi(total_mortgage_debt_by_survey) & !mi(total_mortgage_debt`qq') & total_mortgage_debt`qq'!=0 & survey_year==2017
	replace second_mortgage_by_survey     = 1 if (total_mortgage_debt`qq' > balq`qq') & survey_year==2017 & !mi(total_mortgage_debt`qq')

}


replace total_mortgage_debt_by_survey = total_mortgage_debt76 if survey_year==2018 & total_mortgage_debt76!=0 // 12/31/2016
replace second_mortgage_by_survey     = (total_mortgage_debt76 > balq76) if survey_year==2018 & total_mortgage_debt76!=0  & !mi(total_mortgage_debt76)
forvalues qq = 75(-1)70 {
	replace total_mortgage_debt_by_survey = total_mortgage_debt`qq' if mi(total_mortgage_debt_by_survey) & !mi(total_mortgage_debt`qq') & total_mortgage_debt`qq'!=0 & survey_year==2018
	replace second_mortgage_by_survey     = 1 if (total_mortgage_debt`qq' > balq`qq') & survey_year==2018 & !mi(total_mortgage_debt`qq')
}

gen LTV_BK_st          = 100 * (total_mortgage_debt_by_survey / MtM_value_BK_st)
gen LTV_BK_cnty_coarse = 100 * (total_mortgage_debt_by_survey / MtM_value_BK_cnty_coarse)
gen LTV_BK_cnty_fine   = 100 * (total_mortgage_debt_by_survey / MtM_value_BK_cnty_fine)

gen LTV_FHFA_st        = 100 * (total_mortgage_debt_by_survey / MtM_value_FHFA_st)
gen LTV_FHFA_county    = 100 * (total_mortgage_debt_by_survey / MtM_value_FHFA_county)
gen LTV_FHFA_tract     = 100 * (total_mortgage_debt_by_survey / MtM_value_FHFA_tract)

* Use coarse BK data when missing FHFA county. Use FHFA state when BK county is unavailable. 
replace LTV_FHFA_county = LTV_BK_cnty_coarse if mi(LTV_FHFA_county)
replace LTV_FHFA_county = LTV_FHFA_st if mi(LTV_FHFA_county)

*Use FHFA county (including substitutes where necessary) when missing FHFA tract.
replace LTV_FHFA_tract  = LTV_FHFA_county if mi(LTV_FHFA_tract)

* FHFA state HPI is missing from Puerto Rico because it doesn't match
* FHFA county HPI comes with the NMDB and is available for Puerto Rico.  hpiq does not vary with Puerto Rico, so it seems to apply for the whole territory.
* Therefore, use FHFA county-level LTV for Puerto Rico is missing
replace LTV_FHFA_st = LTV_FHFA_county if mi(LTV_FHFA_st) & state=="PR"


* Get an indicator for a second mortgage by the time of the survey
foreach var in total_mortgage_debt balq {
gen `var'_timeofsurvey     = `var'74 if survey_year==2016  // q74 is 6/30/2016
replace `var'_timeofsurvey = `var'78 if survey_year==2017 // q78 is 6/30/2017
replace `var'_timeofsurvey = `var'82 if survey_year==2018 // q82 is 6/30/2018
}

gen LTV_FHFA_timeofsurvey     = 100 * (total_mortgage_debt_timeofsurvey / MtM_value_FHFA_timeofsurvey)
gen second_mortgage_timeofsurvey = (total_mortgage_debt_timeofsurvey >  balq_timeofsurvey) & !mi(total_mortgage_debt_timeofsurvey)

/*******************************************************************************
** Generate binary effectively underwater flags
*******************************************************************************/

* First, start by using the MtM values
foreach var of varlist LTV* {
	gen EUW_`var' = (`var'>=90) if !mi(`var') 
	}
	
cap gen EUW_or_disaster       = (EUW_LTV_FHFA_tract==1) | (x90g==1) // this line will only work for ASMB respondents, when we have x90g
cap replace EUW_or_disaster   = . if mi(EUW_LTV_FHFA_tract) & (x90g==2)
